{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入各种依赖库\n",
    "因为此目录下已经有训练好的模型，可以直接到最后一个代码块运行，无需额外训练，如果与实际应用情况存在偏差，请添加数据集进行再次训练\n",
    "\n",
    "没有的模块进行安装\n",
    "``` bash\n",
    "pip install modual\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import gzip\n",
    "import os\n",
    "import tempfile\n",
    "import cv2\n",
    "import random\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 生成数据集格式"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### train data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入数据图片，以features命名\n",
    "imgs=os.listdir('./datasets/train/imgs/')\n",
    "# 定义一个排序函数\n",
    "def nu_str(string):\n",
    "    return int(string.split('.')[0])\n",
    "# 将文件夹中的文件按照名称数字大小进行排序 能够与labels一一对应\n",
    "imgs.sort(key=nu_str)\n",
    "features_train=[]\n",
    "# 对每一张图片进行处理，主要是将矩阵转化为一个向量，最后将所有图片打包\n",
    "for i in imgs:\n",
    "    img=cv2.imread('./datasets/train/imgs/'+str(i),0)\n",
    "    #res,img=cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)\n",
    "    #img=cv2.copyMakeBorder(img,5,5,5,5,cv2.BORDER_CONSTANT,value=0)\n",
    "    #cv2.imshow('3',img)\n",
    "    #cv2.waitKey(100)\n",
    "    img=img.reshape(28*28)/255\n",
    "    features_train.append(img)\n",
    "features_train=np.array(features_train) # 包含所有图片的一个向量集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 将每一个图片对应的结果转化为one-hot形式储存\n",
    "# 读取文件所有内容\n",
    "with open('./datasets/train/targets/target.txt','r') as f:\n",
    "    tars=f.readlines()\n",
    "# 向量不同位置对应的结果\n",
    "tar_temp=[0,1,2,3,4,5,6,7,8,9,'.']\n",
    "labels_train=[]\n",
    "# 构造one-hot形式的向量集\n",
    "for i in tars:\n",
    "    b=np.array([i[0]==str(tar_temp[j]) for j in range(len(tar_temp))])+0\n",
    "    labels_train.append(b)  # 一个包含所有结果的向量集（与图片集一一对应）"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 验证数据集的正确性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5\n",
      "6\n",
      "6\n",
      "5\n",
      "6\n",
      "0\n",
      "5\n",
      "9\n",
      "7\n",
      "10\n",
      "10\n",
      "10\n",
      "10\n",
      "4\n",
      "6\n",
      "0\n",
      "4\n",
      "0\n",
      "8\n",
      "1\n",
      "0\n",
      "0\n",
      "0\n",
      "1\n",
      "0\n",
      "7\n",
      "0\n",
      "10\n",
      "10\n",
      "10\n",
      "10\n",
      "4\n",
      "2\n",
      "2\n",
      "9\n",
      "3\n",
      "0\n",
      "4\n",
      "1\n",
      "0\n",
      "0\n",
      "1\n",
      "0\n",
      "7\n",
      "0\n",
      "7\n",
      "8\n",
      "3\n",
      "2\n",
      "4\n",
      "1\n"
     ]
    }
   ],
   "source": [
    "# 选择运行\n",
    "# 查看数据集与结果是否一一对应，主要看看显示的图片和打印的数字是否一致\n",
    "\n",
    "for i in range(len(features_train)):\n",
    "    cv2.imshow('feature',features_train[i].reshape(28,28))\n",
    "    print(np.argmax(labels_train[i]))\n",
    "    cv2.waitKey(500) # 单张图片的显示时间ms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 生成测试数据集，同训练集一样\n",
    "\n",
    "imgs=os.listdir('./datasets/test/imgs/')\n",
    "def nu_str(string):\n",
    "    return int(string.split('.')[0])\n",
    "imgs.sort(key=nu_str)\n",
    "features_test=[]\n",
    "for i in imgs:\n",
    "    img=cv2.imread('./datasets/test/imgs/'+str(i),0)\n",
    "    #res,img=cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)\n",
    "    #img=cv2.copyMakeBorder(img,5,5,5,5,cv2.BORDER_CONSTANT,value=0)\n",
    "    #cv2.imshow('3',img)\n",
    "    #cv2.waitKey(100)\n",
    "    img=img.reshape(28*28)/255\n",
    "    features_test.append(img)\n",
    "features_test=np.array(features_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('./datasets/test/targets/target.txt','r') as f:\n",
    "    tars=f.readlines()\n",
    "tar_temp=[0,1,2,3,4,5,6,7,8,9,'.']\n",
    "labels_test=[]\n",
    "for i in tars:\n",
    "    b=np.array([i[0]==str(tar_temp[j]) for j in range(len(tar_temp))])+0\n",
    "    labels_test.append(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n",
      "10\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "7\n",
      "0\n",
      "0\n",
      "0\n",
      "0\n",
      "2\n",
      "0\n",
      "0\n",
      "0\n",
      "2\n",
      "6\n",
      "0\n",
      "0\n",
      "0\n",
      "0\n",
      "2\n",
      "9\n",
      "1\n",
      "0\n",
      "0\n",
      "0\n",
      "2\n",
      "9\n",
      "0\n",
      "2\n",
      "8\n",
      "0\n",
      "2\n",
      "3\n",
      "0\n",
      "5\n",
      "6\n",
      "6\n"
     ]
    }
   ],
   "source": [
    "# 查看测试训练集的正确性  可以跳过\n",
    "for i in range(len(features_test)):\n",
    "    cv2.imshow('feature',features_test[i].reshape(28,28))\n",
    "    print(np.argmax(labels_test[i]))\n",
    "    cv2.waitKey(500)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 线性训练\n",
    "这个训练结果太差，忽略"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### train \n",
    "忽略 不需要运行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /home/lthpc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/util/tf_should_use.py:118: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.\n",
      "Instructions for updating:\n",
      "Use `tf.global_variables_initializer` instead.\n"
     ]
    }
   ],
   "source": [
    "sess = tf.Session()\n",
    "# 设置放置自变量的空间(图片输入的)\n",
    "x=tf.placeholder(\"float\",[None,28*28])\n",
    "# 设置权重以及偏置的变量空间\n",
    "W=tf.Variable(tf.zeros([28*28,11]))\n",
    "b=tf.Variable(tf.zeros([11]))\n",
    "# 计算特定权重偏置下的结果\n",
    "y=tf.nn.softmax(tf.matmul(x,W)+b)\n",
    "# 设置储存实际结果的空间\n",
    "y_=tf.placeholder(\"float\",[None,11])\n",
    "# 计算预测与实际的交叉熵\n",
    "cross_entropy=-tf.reduce_sum(y_*tf.log(y))\n",
    "# 训练 通过使得交叉熵向减小的方向\n",
    "train_step=tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)\n",
    "# 初始化变量\n",
    "init = tf.initialize_all_variables()\n",
    "# 设置学习的session\n",
    "sess.run(init)\n",
    "\n",
    "#设置训练\n",
    "for i in range(10):\n",
    "    # 每次训练随机取50张图片和结果进行训练\n",
    "    sample=random.sample(range(len(labels_train)),50)\n",
    "    # 读取训练图片\n",
    "    batch_xs=np.array([features_train[i] for i in sample])\n",
    "    # 读取训练结果labels\n",
    "    batch_ys=np.array([labels_train[i] for i in sample])\n",
    "    # 训练\n",
    "    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### test\n",
    "不需要运行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.43589744\n"
     ]
    }
   ],
   "source": [
    "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))  # 比较结果和实际结果\n",
    "accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\")) # 正确率计算\n",
    "print(sess.run(accuracy, feed_dict={x: features_test, y_: labels_test})) # 测试\n",
    "sess.close() # 关闭sess"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多重卷积训练\n",
    "正确率极高推荐使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义权重函数工厂函数（批量生产权重的函数，为了方便）\n",
    "def weight_variable(shape,name):\n",
    "    initial = tf.truncated_normal(shape, stddev=0.1)\n",
    "    return tf.Variable(initial,name=name)\n",
    "# 定义偏置工厂函数\n",
    "def bias_variable(shape,name):\n",
    "    initial = tf.constant(0.1, shape=shape)\n",
    "    return tf.Variable(initial,name=name)\n",
    "# 定义卷积矩阵工厂函数\n",
    "def conv2d(x, W):\n",
    "      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')\n",
    "# 定义池化层矩阵工厂函数\n",
    "def max_pool_2x2(x):\n",
    "      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],\n",
    "                        strides=[1, 2, 2, 1], padding='SAME')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 主要有两层卷积运算\n",
    "# 第一层卷积层定义\n",
    "W_conv1 = weight_variable([5, 5, 1, 32],name='w_conv1')  # 权重变量\n",
    "b_conv1 = bias_variable([32],name='b_conv1',)            # 偏置\n",
    "\n",
    "# 图片输入空间生成,结果空间生成（请求组织先分配好茅坑）\n",
    "x = tf.placeholder(\"float\", shape=[None, 28*28],name=\"X\")  # \n",
    "y_ = tf.placeholder(\"float\", shape=[None, 11],name=\"Y\")\n",
    "\n",
    "# 将输入空间重新塑造为28*28*1（1指单通道，-1是指可以随机应变），为了后面的卷积运算 因为输入是一个向量集\n",
    "x_image = tf.reshape(x, [-1,28,28,1]) \n",
    "\n",
    "# 定义卷积矩阵并计算\n",
    "h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)\n",
    "# 定义池化层 \n",
    "h_pool1 = max_pool_2x2(h_conv1)\n",
    "# 第二层卷积层定义\n",
    "W_conv2 = weight_variable([5, 5, 32, 64],name='w_conv2')  # 权重变量\n",
    "b_conv2 = bias_variable([64],name='b_conv2')              # 偏置\n",
    "\n",
    "h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)\n",
    "h_pool2 = max_pool_2x2(h_conv2)\n",
    "\n",
    "# 经过两次卷积和池化，最后的图片只有7*7了，但是还是不知道他到底是什么鬼，所以再来一个权重矩阵，来算算他到底是什么鬼\n",
    "# 第一个与处理后图片尺寸一样的权重矩阵变量和偏置变量，直接点乘\n",
    "W_fc1 = weight_variable([7 * 7 * 64, 1024],name='w_fc1')  \n",
    "b_fc1 = bias_variable([1024],name='b_fc1')\n",
    "\n",
    "h_pool2_flat = tf.reshape(h_pool2, [-1,7*7*64])\n",
    "h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)\n",
    "\n",
    "# keep_prob为了防止过拟合，具体原理我还没看到。。。。\n",
    "keep_prob = tf.placeholder(\"float\",name='keep_prob')\n",
    "h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)\n",
    "# 再来一个矩阵使得其变成一个全连接层，所谓全连接层就是一个向量，之所以要将矩阵化为全连接层\n",
    "# 就是为了使得他通过和再一个权重相乘能够得到和结果维度相同的输出\n",
    "W_fc2 = weight_variable([1024, 11],name='w_fc2')\n",
    "b_fc2 = bias_variable([11],name='b_fc2')\n",
    "# 最后的结果输出为一个向量 和labels相同的维度，\n",
    "y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)\n",
    "# 设置模型格式 ，添加输出的格式进去\n",
    "tf.add_to_collection('yconv',y_conv)\n",
    "saver = tf.train.Saver()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step 0, training accuracy 0.16\n",
      "step 100, training accuracy 1\n",
      "step 200, training accuracy 1\n",
      "test accuracy 0.948718\n"
     ]
    }
   ],
   "source": [
    "# 训练模型\n",
    "with tf.Session() as sess:\n",
    "    # 设置交叉熵为损失函数\n",
    "    cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))\n",
    "    # 设置优化参数，采用AdamOptimizer优化方法，比最速下降法更优，能够防止过拟合\n",
    "    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)\n",
    "    # 判断预测结果和真实结果是否相同\n",
    "    correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))\n",
    "    # 精度\n",
    "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\n",
    "    # 初始化各个变量\n",
    "    sess.run(tf.initialize_all_variables())\n",
    "    # 迭代训练\n",
    "    for i in range(201):\n",
    "        # 随机选取数据进行训练\n",
    "        sample = random.sample(range(len(labels_train)),50)\n",
    "        batch_xs=np.array([features_train[i] for i in sample])\n",
    "        batch_ys=np.array([labels_train[i] for i in sample])\n",
    "        # 当是100倍数是保存模型，并且输出当前测试精度，保存路径为相对路径\n",
    "        if i%100 == 0:\n",
    "            train_accuracy = accuracy.eval(feed_dict={x:batch_xs, y_: batch_ys, keep_prob: 1.0})\n",
    "            print (\"step %d, training accuracy %g\"%(i, train_accuracy))\n",
    "            save_path = saver.save(sess, \"./datasets/digit_model/my_digit_model\")\n",
    "        train_step.run(feed_dict={x:batch_xs, y_: batch_ys, keep_prob: 0.5})\n",
    "    # 测试整体精度，加载测试集\n",
    "    print (\"test accuracy %g\"%accuracy.eval(feed_dict={x: features_test, y_: labels_test, keep_prob: 1.0}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 加载使用模型  应用训练好的模型\n",
    "模型已经存在相应位置，可以直接使用此代码块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Restoring parameters from ./datasets/digit_model/my_digit_model\n",
      "不好意思，目前不支持多个小数点的数值识别\n",
      "图中数字为..244.244...\n"
     ]
    }
   ],
   "source": [
    "import cv2\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import os\n",
    "\n",
    "\n",
    "\n",
    "# 不支持多行数字识别，以及单行多个小数点的数值识别（单行只能实现字符串识别），\n",
    "# labels的各个位置代表的数字\n",
    "tar_temp=[0,1,2,3,4,5,6,7,8,9,'.'] \n",
    "\n",
    "# 定义一个阈值函数，将数码管部分取出来，根据实际情况进行相应修改，找到最优参数\n",
    "def thresholding_inv(image):\n",
    "    # 定义膨胀核心，根据实际情况进行修改\n",
    "    kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT,(1, 6))# 1代表横向膨胀，6代表纵向膨胀\n",
    "    ## 腐蚀参数我已经注释掉，根据实际情况选择是否使用\n",
    "    kernel_erode = cv2.getStructuringElement(cv2.MORPH_RECT,(2, 1)) \n",
    "    ## 根据RGB图得到灰度图\n",
    "    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
    "    # 灰度图二值化\n",
    "    ret, bin = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)\n",
    "    ## 对灰度图进行腐蚀，主要是为了分离相近的小数点，如果足够清晰可以不使用腐蚀，我已注释掉\n",
    "    bin = cv2.erode(bin,kernel_erode)\n",
    "    ## 对灰度图进行膨胀\n",
    "    bin=cv2.dilate(bin,kernel_dilate,iterations = 1)\n",
    "    return bin\n",
    "\n",
    "# Read the input image\n",
    "## demo 图像在此目录下\n",
    "im = cv2.imread('./datasets/2018-09-10_162811_270.jpg')  # 还有 1-6 张图 修改最后一个数即可\n",
    "## 二值化处理\n",
    "im_th = thresholding_inv(im)\n",
    "\n",
    "# 显示图片\n",
    "cv2.imshow('im_th',im_th)\n",
    "cv2.waitKey(10) # 显示1000ms \n",
    "\n",
    "# Find contours in the image  寻找边界集合\n",
    "_,ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
    "# Get rectangles contains each contour \n",
    "rects = [cv2.boundingRect(ctr) for ctr in ctrs]\n",
    "# 加载训练好的模型，并预测通过\n",
    "with tf.Session() as sess:\n",
    "    # 加载模型的结构框架graph\n",
    "    new_saver = tf.train.import_meta_graph('./datasets/digit_model/my_digit_model.meta')\n",
    "    # 加载各种变量\n",
    "    new_saver.restore(sess,'./datasets/digit_model/my_digit_model')\n",
    "    yy_hyp = tf.get_collection('yconv')[0]\n",
    "    graph = tf.get_default_graph() \n",
    "    X = graph.get_operation_by_name('X').outputs[0]#为了将 x placeholder加载出来\n",
    "    keep_prob = graph.get_operation_by_name('keep_prob').outputs[0] # 将keep_prob placeholder加载出来\n",
    "    # mm用来保存数字以及数字坐标\n",
    "    mm={}\n",
    "    # for循环对每一个contour 进行预测和求解，并储存\n",
    "    for rect in rects:\n",
    "        # Draw the rectangles 得到数字区域 roi\n",
    "        cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) \n",
    "        # Make the rectangular region around the digit\n",
    "        leng1= int(rect[3])\n",
    "        leng2= int(rect[2])\n",
    "        pt1 = int(rect[1] )\n",
    "        pt2 = int(rect[0] )\n",
    "        # 得到数字区域\n",
    "        roi = im_th[pt1:pt1+leng1, pt2:pt2+leng2]\n",
    "        # 尺寸缩放为模型尺寸\n",
    "        roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)\n",
    "        # 处理成一个向量，为了和模型输入一直\n",
    "        roi=np.array([roi.reshape(28*28)/255])\n",
    "        # 运行模型得到预测结果\n",
    "        pred= sess.run(yy_hyp,feed_dict = {X:roi,keep_prob:1.0})\n",
    "        # 得到最大可能值索引 ind\n",
    "        ind=np.argmax(pred)\n",
    "        #labels不同位置代表的不同数字   (tar_temp[ind]) 就是预测值\n",
    "        # 将预测值添加到图像中，并显示\n",
    "        cv2.putText(im, str(tar_temp[ind]), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)\n",
    "        # 储存每个数字和其对应的boundingbox的像素点坐标\n",
    "        mm[pt2]=tar_temp[ind]\n",
    "    # 最后的处理\n",
    "    # 根据像素坐标，从左到右排序，得到数字的顺序\n",
    "    num_tup=sorted(mm.items(),key=lambda x:x[0])\n",
    "    # 将数字列表连接为字符串\n",
    "    num=(''.join([str(i[1]) for i in num_tup]))\n",
    "    try:\n",
    "        numn=float(num)\n",
    "        print('图中数字为%s,数值大小为%s' %(num,numn))\n",
    "    except:\n",
    "        print('不好意思，目前不支持多个小数点的数值识别')\n",
    "        print('图中数字为%s'% num)\n",
    "    # 显示图像 \n",
    "    cv2.namedWindow(\"Resulting Image with Rectangular ROIs\", cv2.WINDOW_NORMAL)\n",
    "    cv2.imshow(\"Resulting Image with Rectangular ROIs\", im)\n",
    "    cv2.waitKey(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
